{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "\n",
    "_You are currently looking at **version 1.0** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-data-analysis/resources/0dhYG) course resource._\n",
    "\n",
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Distributions in Pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.random.binomial(1, 0.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.505"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.random.binomial(1000, 0.5)/1000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 0, 0, ..., 0, 0, 0])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chance_of_tornado = 0.01/100\n",
    "np.random.binomial(100000, chance_of_tornado)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "96 tornadoes back to back in 2739.72602739726 years\n"
     ]
    }
   ],
   "source": [
    "chance_of_tornado = 0.01\n",
    "\n",
    "tornado_events = np.random.binomial(1, chance_of_tornado, 1000000)\n",
    "    \n",
    "two_days_in_a_row = 0\n",
    "for j in range(1,len(tornado_events)-1):\n",
    "    if tornado_events[j]==1 and tornado_events[j-1]==1:\n",
    "        two_days_in_a_row+=1\n",
    "\n",
    "print('{} tornadoes back to back in {} years'.format(two_days_in_a_row, 1000000/365))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "np.random.uniform(0, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "np.random.normal(0.75)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Formula for standard deviation\n",
    "$$\\sqrt{\\frac{1}{N} \\sum_{i=1}^N (x_i - \\overline{x})^2}$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "distribution = np.random.normal(0.75,size=1000)\n",
    "\n",
    "np.sqrt(np.sum((np.mean(distribution)-distribution)**2)/len(distribution))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "np.std(distribution)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import scipy.stats as stats\n",
    "stats.kurtosis(distribution)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "stats.skew(distribution)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "chi_squared_df2 = np.random.chisquare(2, size=10000)\n",
    "stats.skew(chi_squared_df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "chi_squared_df5 = np.random.chisquare(5, size=10000)\n",
    "stats.skew(chi_squared_df5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "output = plt.hist([chi_squared_df2,chi_squared_df5], bins=50, histtype='step', \n",
    "                  label=['2 degrees of freedom','5 degrees of freedom'])\n",
    "plt.legend(loc='upper right')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Hypothesis Testing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = pd.read_csv('grades.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>student_id</th>\n",
       "      <th>assignment1_grade</th>\n",
       "      <th>assignment1_submission</th>\n",
       "      <th>assignment2_grade</th>\n",
       "      <th>assignment2_submission</th>\n",
       "      <th>assignment3_grade</th>\n",
       "      <th>assignment3_submission</th>\n",
       "      <th>assignment4_grade</th>\n",
       "      <th>assignment4_submission</th>\n",
       "      <th>assignment5_grade</th>\n",
       "      <th>assignment5_submission</th>\n",
       "      <th>assignment6_grade</th>\n",
       "      <th>assignment6_submission</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>B73F2C11-70F0-E37D-8B10-1D20AFED50B1</td>\n",
       "      <td>92.733946</td>\n",
       "      <td>2015-11-02 06:55:34.282000000</td>\n",
       "      <td>83.030552</td>\n",
       "      <td>2015-11-09 02:22:58.938000000</td>\n",
       "      <td>67.164441</td>\n",
       "      <td>2015-11-12 08:58:33.998000000</td>\n",
       "      <td>53.011553</td>\n",
       "      <td>2015-11-16 01:21:24.663000000</td>\n",
       "      <td>47.710398</td>\n",
       "      <td>2015-11-20 13:24:59.692000000</td>\n",
       "      <td>38.168318</td>\n",
       "      <td>2015-11-22 18:31:15.934000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>98A0FAE0-A19A-13D2-4BB5-CFBFD94031D1</td>\n",
       "      <td>86.790821</td>\n",
       "      <td>2015-11-29 14:57:44.429000000</td>\n",
       "      <td>86.290821</td>\n",
       "      <td>2015-12-06 17:41:18.449000000</td>\n",
       "      <td>69.772657</td>\n",
       "      <td>2015-12-10 08:54:55.904000000</td>\n",
       "      <td>55.098125</td>\n",
       "      <td>2015-12-13 17:32:30.941000000</td>\n",
       "      <td>49.588313</td>\n",
       "      <td>2015-12-19 23:26:39.285000000</td>\n",
       "      <td>44.629482</td>\n",
       "      <td>2015-12-21 17:07:24.275000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>D0F62040-CEB0-904C-F563-2F8620916C4E</td>\n",
       "      <td>85.512541</td>\n",
       "      <td>2016-01-09 05:36:02.389000000</td>\n",
       "      <td>85.512541</td>\n",
       "      <td>2016-01-09 06:39:44.416000000</td>\n",
       "      <td>68.410033</td>\n",
       "      <td>2016-01-15 20:22:45.882000000</td>\n",
       "      <td>54.728026</td>\n",
       "      <td>2016-01-11 12:41:50.749000000</td>\n",
       "      <td>49.255224</td>\n",
       "      <td>2016-01-11 17:31:12.489000000</td>\n",
       "      <td>44.329701</td>\n",
       "      <td>2016-01-17 16:24:42.765000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>FFDF2B2C-F514-EF7F-6538-A6A53518E9DC</td>\n",
       "      <td>86.030665</td>\n",
       "      <td>2016-04-30 06:50:39.801000000</td>\n",
       "      <td>68.824532</td>\n",
       "      <td>2016-04-30 17:20:38.727000000</td>\n",
       "      <td>61.942079</td>\n",
       "      <td>2016-05-12 07:47:16.326000000</td>\n",
       "      <td>49.553663</td>\n",
       "      <td>2016-05-07 16:09:20.485000000</td>\n",
       "      <td>49.553663</td>\n",
       "      <td>2016-05-24 12:51:18.016000000</td>\n",
       "      <td>44.598297</td>\n",
       "      <td>2016-05-26 08:09:12.058000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5ECBEEB6-F1CE-80AE-3164-E45E99473FB4</td>\n",
       "      <td>64.813800</td>\n",
       "      <td>2015-12-13 17:06:10.750000000</td>\n",
       "      <td>51.491040</td>\n",
       "      <td>2015-12-14 12:25:12.056000000</td>\n",
       "      <td>41.932832</td>\n",
       "      <td>2015-12-29 14:25:22.594000000</td>\n",
       "      <td>36.929549</td>\n",
       "      <td>2015-12-28 01:29:55.901000000</td>\n",
       "      <td>33.236594</td>\n",
       "      <td>2015-12-29 14:46:06.628000000</td>\n",
       "      <td>33.236594</td>\n",
       "      <td>2016-01-05 01:06:59.546000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                             student_id  assignment1_grade  \\\n",
       "0  B73F2C11-70F0-E37D-8B10-1D20AFED50B1          92.733946   \n",
       "1  98A0FAE0-A19A-13D2-4BB5-CFBFD94031D1          86.790821   \n",
       "2  D0F62040-CEB0-904C-F563-2F8620916C4E          85.512541   \n",
       "3  FFDF2B2C-F514-EF7F-6538-A6A53518E9DC          86.030665   \n",
       "4  5ECBEEB6-F1CE-80AE-3164-E45E99473FB4          64.813800   \n",
       "\n",
       "          assignment1_submission  assignment2_grade  \\\n",
       "0  2015-11-02 06:55:34.282000000          83.030552   \n",
       "1  2015-11-29 14:57:44.429000000          86.290821   \n",
       "2  2016-01-09 05:36:02.389000000          85.512541   \n",
       "3  2016-04-30 06:50:39.801000000          68.824532   \n",
       "4  2015-12-13 17:06:10.750000000          51.491040   \n",
       "\n",
       "          assignment2_submission  assignment3_grade  \\\n",
       "0  2015-11-09 02:22:58.938000000          67.164441   \n",
       "1  2015-12-06 17:41:18.449000000          69.772657   \n",
       "2  2016-01-09 06:39:44.416000000          68.410033   \n",
       "3  2016-04-30 17:20:38.727000000          61.942079   \n",
       "4  2015-12-14 12:25:12.056000000          41.932832   \n",
       "\n",
       "          assignment3_submission  assignment4_grade  \\\n",
       "0  2015-11-12 08:58:33.998000000          53.011553   \n",
       "1  2015-12-10 08:54:55.904000000          55.098125   \n",
       "2  2016-01-15 20:22:45.882000000          54.728026   \n",
       "3  2016-05-12 07:47:16.326000000          49.553663   \n",
       "4  2015-12-29 14:25:22.594000000          36.929549   \n",
       "\n",
       "          assignment4_submission  assignment5_grade  \\\n",
       "0  2015-11-16 01:21:24.663000000          47.710398   \n",
       "1  2015-12-13 17:32:30.941000000          49.588313   \n",
       "2  2016-01-11 12:41:50.749000000          49.255224   \n",
       "3  2016-05-07 16:09:20.485000000          49.553663   \n",
       "4  2015-12-28 01:29:55.901000000          33.236594   \n",
       "\n",
       "          assignment5_submission  assignment6_grade  \\\n",
       "0  2015-11-20 13:24:59.692000000          38.168318   \n",
       "1  2015-12-19 23:26:39.285000000          44.629482   \n",
       "2  2016-01-11 17:31:12.489000000          44.329701   \n",
       "3  2016-05-24 12:51:18.016000000          44.598297   \n",
       "4  2015-12-29 14:46:06.628000000          33.236594   \n",
       "\n",
       "          assignment6_submission  \n",
       "0  2015-11-22 18:31:15.934000000  \n",
       "1  2015-12-21 17:07:24.275000000  \n",
       "2  2016-01-17 16:24:42.765000000  \n",
       "3  2016-05-26 08:09:12.058000000  \n",
       "4  2016-01-05 01:06:59.546000000  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2315"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "early = df[df['assignment1_submission'] <= '2015-12-31']\n",
    "late = df[df['assignment1_submission'] > '2015-12-31']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "assignment1_grade    74.972741\n",
       "assignment2_grade    67.252190\n",
       "assignment3_grade    61.129050\n",
       "assignment4_grade    54.157620\n",
       "assignment5_grade    48.634643\n",
       "assignment6_grade    43.838980\n",
       "dtype: float64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "early.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "assignment1_grade    74.017429\n",
       "assignment2_grade    66.370822\n",
       "assignment3_grade    60.023244\n",
       "assignment4_grade    54.058138\n",
       "assignment5_grade    48.599402\n",
       "assignment6_grade    43.844384\n",
       "dtype: float64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "late.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from scipy import stats\n",
    "stats.ttest_ind?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Ttest_indResult(statistic=1.400549944897566, pvalue=0.16148283016060577)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stats.ttest_ind(early['assignment1_grade'], late['assignment1_grade'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Ttest_indResult(statistic=1.3239868220912567, pvalue=0.18563824610067967)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stats.ttest_ind(early['assignment2_grade'], late['assignment2_grade'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Ttest_indResult(statistic=1.7116160037010733, pvalue=0.087101516341556676)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stats.ttest_ind(early['assignment3_grade'], late['assignment3_grade'])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
